# -*- coding: utf-8 -*-

# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html
import redis,re

class MastergoodreadPipeline(object):
    
    def __init__(self,host,port):
        self.r = redis.Redis(host = host, port = port, decode_responses = True)
        
    @classmethod
    def from_crawler(cls,crawler):
        return cls(
        host = crawler.settings.get("REDIS_HOST"),
        port = crawler.settings.get("REDIS_PORT")
        )
        
    def process_item(self, item, spider):
        bookid = re.findall('/book/show/([0-9]+)[-\.]',item['url'])
        if bookid:
            if self.r.sadd('books:id',bookid[0]):
                self.r.lpush('booksdetail:start_urls','https://www.goodreads.com' + item['url'])
        else:
            self.r.lpush('booksdetail:no_urls','https://www.goodreads.com' + item['url'])
        #lpush goodreads:start_urls https://www.goodreads.com/list/show/44615.Best_Contemporary_Women_s_Fiction
        
